import pandas as pd
import re
df = pd.read_csv('../static/data/dangdang.csv')
df['rank'] = df['rank'].astype(int)
df['comment_count'] = df['comment_count'].apply(lambda x:''.join(re.findall('\d+?',x)))
df['comment_count'] = df['comment_count'].apply(lambda x: 0 if x=='' else int(x))
df['recommend_percent'] = df['recommend_percent'].apply(lambda x:x[:-3]).astype(float)
df['publisher_info'] = df['publisher_info'].apply(lambda x:'' if pd.isnull(x) else x)
df['publish_date'] = df['publisher_info'].apply(lambda x:''.join(re.findall('\d{4}-\d{2}-\d{2}',x)))
df['publish_year_month'] = df['publish_date'].apply(lambda x:''.join(re.findall('\d{4}-\d{2}',x)))
df['publish_info_new'] = df['publisher_info'].apply(lambda x:re.sub('\d{4}-\d{2}-\d{2}','|',x))
df['publish_house'] = df['publish_info_new'].apply(lambda x:''.join(re.findall('\|(.*?社)',x)))
df['publisher'] = df['publish_info_new'].apply(lambda x:re.sub('\|.*?社','',x))
df['price_n'] = df['price_n'].apply(lambda x:x.split('¥')[1])
df['price_n'] = df['price_n'].apply(lambda x:''.join(re.findall('\d',x))).astype(float)
df['price_n'] = df['price_n'].apply(lambda x: x/100)
df['price_r'] = df['price_r'].apply(lambda x:x.split('¥')[1])
df['price_r'] = df['price_r'].apply(lambda x:''.join(re.findall('\d',x))).astype(float)
df['price_r'] = df['price_r'].apply(lambda x: x/100)
df['price_s'] = df['price_s'].apply(lambda x:''.join(re.findall('(.*?)折',x))).astype(float)
df_pre = df[['rank','name','comment_count','recommend_percent','price_n','price_r','price_s','publish_date','publish_year_month','publish_house','publisher']]
df_pre.to_csv('../static/data/book_info_pre.csv',encoding='utf_8_sig',index=False)